"""
解析vol网页 得到期的url
"""
import facade
from parsel import Selector
from xjlibrary.our_file_dir import BaseDir

curpath = BaseDir.get_file_dir_absolute(__file__)
TopPath = BaseDir.get_upper_dir(curpath, -2)
configfile = BaseDir.get_new_path(curpath, "db.ini")
dirPath = BaseDir.get_new_path(TopPath, "download", "intlpress", "download", "volsissue")


class ParaVol(object):

    def __init__(self):
        self.logger = facade.get_streamlogger()
        self.mysqlutils = facade.MysqlUtiles(configfile, "db", logger=self.logger)

    def select(self):
        for file in BaseDir.get_dir_all_files(dirPath):
            self.logger.info(file)
            filename = BaseDir.get_filename_not_extsep(file)
            jid = filename.split("_")[0].strip()
            vol = filename.split("_")[1].strip()
            sql = "select jid,vol,`year` from vol where jid='{}' and vol='{}'".format(jid, vol)
            rows = self.mysqlutils.SelectFromDB(sql)
            jid = rows[0][0]
            vol = rows[0][1]
            year = rows[0][2]
            html = BaseDir.single_read_file(file)
            selector = Selector(text=html)
            div_list = selector.xpath('//*[@id="list"]/div[@class="list_item"]')

            for div in div_list:
                href = div.xpath('.//a/@href').get()
                text = div.xpath('.//a/p/text()').get()
                issue = text.split("Number")[-1].strip()
                sql = "replace into issue (url,vol,`year`,jid,issue) values ('%s','%s','%s','%s','%s')" % (
                    href, vol, year, jid, issue)
                self.mysqlutils.ExeSqlToDB(sql)


def main():
    para = ParaVol()
    para.select()


if __name__ == "__main__":
    main()
